###########################################################
#
#  This file:
#
#  1. computes average predicted probabilities of joining
#     a club using the observed value approach
#  2. produces Figure 3 shown in the paper 
#
###########################################################


setwd("...")


### Read data ###

# read variables

mf_orig <- read.csv("an_sample.csv", header = TRUE, encoding = "UTF-8")
# match coef names
names(mf_orig)[3:4] <- c("dist_med_part", "dist_med_kpart") 


# read regression estimates

names <- character(3)
coefs <- as.list(rep(NA, 3))

for (i in 1:3) {
  names[i] <- paste("M", i, sep = "")
  file <- paste("coefs_M", i, ".csv", sep = "")
  coefs[[i]] <- (read.csv(file, header = TRUE, encoding = "UTF-8"))
  rm(file)
}

names(coefs) <- names



### Define quantities ###

clubs <- levels(mf_orig$alt)
vars <- names(mf_orig)[-c(1, 2)] 
mainvars <- c("d_alter", 
              "ant_eigReg", 
              "ant_eigKonf", 
              "ant_eigStand", 
              "ant_eigParlAmt"
              )


# input: candidate values for all six predictors

x <- as.list(rep(NA, 6)) 

for (i in 1:2) {
  x[[i]] <- seq(100, 0, -1) / -100
}

x[[3]] <- seq(0, 50, 0.5) / 100

for (i in 4:6) {
  x[[i]] <- seq(0, 100, 1) / 100
}

# target: list of avg. pred. probability matrices

avgpr <- as.list(rep(NA, 6))

for (i in 1:length(avgpr)) {
  avgpr[[i]] <- matrix(NA, nrow = length(x[[i]]), 
                       ncol = length(clubs))
  colnames(avgpr[[i]]) <- clubs
}

# model frame for making the predictions

mf <- mf_orig[order(mf_orig$id, mf_orig$alt), ]  



### Make predictions ###

# Ideology 

for (n in 1:length(clubs)) {

  for (i in 1:length(x[[1]])) {

    print(paste(clubs[n], "prediction", i, "of", length(x)))
    
    # exp(xb)
    if(clubs[n] == "none") {
      mf$dist_med_kpart[mf$alt == clubs[n]] <- x[[1]][i] 
    } else {
      mf$dist_med_part[mf$alt == clubs[n]] <- x[[1]][i] 
    }
    mf$xb <- as.matrix(mf[vars]) %*% t(as.matrix(coefs$M1[vars]))
    mf$exb <- exp(mf$xb)

    # pr
    exb_sum <- tapply(mf$exb, mf$id, sum)
    mf$exb_sum <- rep(exb_sum, each = length(clubs))
    mf$pr <- mf$exb / mf$exb_sum
    avgpr[[1]][i, n] <- as.vector(tapply(mf$pr, mf$alt, mean))[n]

  }

  if(clubs[n] == "none") {
    mf$dist_med_kpart <- mf_orig$dist_med_kpart
  } else {
    mf$dist_med_part <- mf_orig$dist_med_part
  }

}

mf <- mf_orig[order(mf_orig$id, mf_orig$alt), ]


# Other variables

for (k in 2:length(avgpr)) {
  
  for (n in 1:length(clubs)) {

    for (i in 1:length(x[[k]])) {
    
      print(paste(mainvars[k - 1], clubs[n], "prediction", i))
    
      # exp(xb)
      mf[mf$alt == clubs[n], mainvars[k - 1]] <- x[[k]][i] 
      mf$xb <- as.matrix(mf[vars]) %*% t(as.matrix(coefs$M1[vars]))
      mf$exb <- exp(mf$xb)
    
      # pr
      exb_sum <- tapply(mf$exb, mf$id, sum)
      mf$exb_sum <- rep(exb_sum, each = length(clubs))
      mf$pr <- mf$exb / mf$exb_sum
      avgpr[[k]][i, n] <- as.vector(tapply(mf$pr, mf$alt, mean))[n]
    
    }

    mf[, mainvars[k - 1]] <- mf_orig[, mainvars[k - 1]]

  }

}

names(avgpr) <- c("Ideology", "Age", "Region", "Confession", 
                  "Nobility", "Prior Mandate")




### Display results ###

# ideological similarity from 0 to 1

for (i in 1:2) {
  x[[i]] <- x[[i]] + 1
}

# labels

xlabel <- c("Ideological similarity to party median", 
            "Age similarity to party mean", 
            "Share own region in party",
            "Share own confession in party",
            "Share own noble status in party",
            "Share own parl. experience in party")
ylabel <- c(rep(c("Avg. Probability", "", ""), 3))

# prepare rug

mf_orig$dist_med_all = mf_orig$dist_med_part + mf_orig$dist_med_kpart
mainvars_orig <- mf_orig[, c("dist_med_all", mainvars)]  
mainvars_orig$dist_med_all <- mainvars_orig$dist_med_all + 1
mainvars_orig$d_alter <- mainvars_orig$d_alter + 1

# main plots

win.metafile("effectSizes.emf", 
             width = 10, 
             height = 6.5, 
             pointsize = 17
             )

par(mfrow = c(2, 3))

for (i in 1:length(avgpr)) {
  
  plot(x[[i]], apply(avgpr[[i]], 1, mean), 
       type = "n", axes = FALSE, 
       ylim = c(0, 0.6), main = names(avgpr)[i], 
       xlab = xlabel[i], ylab = ylabel[i])
  axis(1, at = c(0, 0.5, 1))
  axis(2, at = seq(0, 0.6, 0.2))

  for (n in 1:length(clubs)) {

    lines(x[[i]], avgpr[[i]][, n], col = "grey70", lwd = 1.5)

  }

  lines(x[[i]], apply(avgpr[[i]], 1, mean), lwd = 2)
  rug(mainvars_orig[, i])

}

dev.off()

par(mfrow = c(1, 1))
